From: Keir Fraser Date: Tue, 18 May 2010 14:13:45 +0000 (+0100) Subject: x86: Dynamically allocate percpu data area when a CPU comes online. X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~12127 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22man:///%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22man:/?a=commitdiff_plain;h=00c51c29c14611cdb4164fc64e89b6e7ad6c1df2;p=xen.git x86: Dynamically allocate percpu data area when a CPU comes online. At the same time, the data area starts life zeroed. Signed-off-by: Keir Fraser --- diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile index ba5da39e4b..9aeb1cebb7 100644 --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -36,6 +36,7 @@ obj-y += mpparse.o obj-y += nmi.o obj-y += numa.o obj-y += pci.o +obj-y += percpu.o obj-y += physdev.o obj-y += setup.o obj-y += shutdown.o diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index c0b4f2289d..b18316e7da 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -50,9 +50,7 @@ static struct timer *__read_mostly irq_guest_eoi_timer; static DEFINE_SPINLOCK(vector_lock); -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... NR_VECTORS - 1] = -1 -}; +DEFINE_PER_CPU(vector_irq_t, vector_irq); DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs); @@ -269,7 +267,10 @@ int init_irq_data(void) { struct irq_desc *desc; struct irq_cfg *cfg; - int irq; + int irq, vector; + + for (vector = 0; vector < NR_VECTORS; ++vector) + this_cpu(vector_irq)[vector] = -1; irq_desc = xmalloc_array(struct irq_desc, nr_irqs); irq_cfg = xmalloc_array(struct irq_cfg, nr_irqs); diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c index ee954adc6a..703294329c 100644 --- a/xen/arch/x86/nmi.c +++ b/xen/arch/x86/nmi.c @@ -230,7 +230,7 @@ static inline void write_watchdog_counter(const char *descr) do_div(count, nmi_hz); if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); + Dprintk("setting %s to -0x%"PRIx64"\n", descr, count); wrmsrl(nmi_perfctr_msr, 0 - count); } diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c new file mode 100644 index 0000000000..ea82647b42 --- /dev/null +++ b/xen/arch/x86/percpu.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include + +unsigned long __per_cpu_offset[NR_CPUS]; +#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) +#define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start)) + +void __init percpu_init_areas(void) +{ + unsigned int cpu; + for ( cpu = 1; cpu < NR_CPUS; cpu++ ) + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; +} + +static int init_percpu_area(unsigned int cpu) +{ + char *p; + if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) + return 0; + if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) + return -ENOMEM; + memset(p, 0, __per_cpu_data_end - __per_cpu_start); + __per_cpu_offset[cpu] = p - __per_cpu_start; + return 0; +} + +static void free_percpu_area(unsigned int cpu) +{ + char *p = __per_cpu_start + __per_cpu_offset[cpu]; + free_xenheap_pages(p, PERCPU_ORDER); + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; +} + +static int cpu_percpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int rc = 0; + + switch ( action ) + { + case CPU_UP_PREPARE: + rc = init_percpu_area(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DEAD: + free_percpu_area(cpu); + break; + default: + break; + } + + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); +} + +static struct notifier_block cpu_percpu_nfb = { + .notifier_call = cpu_percpu_callback, + .priority = 100 /* highest priority */ +}; + +static int __init percpu_presmp_init(void) +{ + register_cpu_notifier(&cpu_percpu_nfb); + return 0; +} +presmp_initcall(percpu_presmp_init); diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 6140820a24..8999406642 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -107,12 +107,6 @@ unsigned long __initdata xenheap_initial_phys_start; unsigned long __read_mostly xenheap_phys_end; #endif -DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table) = boot_cpu_gdt_table; -#ifdef CONFIG_COMPAT -DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table) - = boot_cpu_compat_gdt_table; -#endif - DEFINE_PER_CPU(struct tss_struct, init_tss); char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE]; @@ -192,46 +186,6 @@ static void free_xen_data(char *s, char *e) } extern char __init_begin[], __init_end[], __bss_start[]; -extern char __per_cpu_start[], __per_cpu_data_end[]; - -static void __init percpu_init_areas(void) -{ - unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start; - - BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK); - BUG_ON((unsigned long)__per_cpu_data_end & ~PAGE_MASK); - BUG_ON(data_size > PERCPU_SIZE); - - /* Initialise per-cpu data area for all possible secondary CPUs. */ - for ( i = 1; i < NR_CPUS; i++ ) - memcpy(__per_cpu_start + (i << PERCPU_SHIFT), - __per_cpu_start, - data_size); -} - -static void __init percpu_free_unused_areas(void) -{ - unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start; - unsigned int first_unused; - - /* Find first 'impossible' secondary CPU. */ - for ( i = 1; i < NR_CPUS; i++ ) - if ( !cpu_possible(i) ) - break; - first_unused = i; - - /* Check that there are no holes in cpu_possible_map. */ - for ( ; i < NR_CPUS; i++ ) - BUG_ON(cpu_possible(i)); - - /* Free all unused per-cpu data areas. */ - free_xen_data(&__per_cpu_start[first_unused << PERCPU_SHIFT], __bss_start); - - if ( data_size != PERCPU_SIZE ) - for ( i = 0; i < first_unused; i++ ) - free_xen_data(&__per_cpu_start[(i << PERCPU_SHIFT) + data_size], - &__per_cpu_start[(i+1) << PERCPU_SHIFT]); -} static void __init init_idle_domain(void) { @@ -1013,8 +967,6 @@ void __init __start_xen(unsigned long mbi_p) init_apic_mappings(); - percpu_free_unused_areas(); - init_IRQ(); xsm_init(&initrdidx, mbi, initial_images_start); @@ -1200,7 +1152,7 @@ int xen_in_range(unsigned long mfn) paddr_t start, end; int i; - enum { region_s3, region_text, region_percpu, region_bss, nr_regions }; + enum { region_s3, region_text, region_bss, nr_regions }; static struct { paddr_t s, e; } xen_regions[nr_regions]; @@ -1214,10 +1166,6 @@ int xen_in_range(unsigned long mfn) /* hypervisor code + data */ xen_regions[region_text].s =__pa(&_stext); xen_regions[region_text].e = __pa(&__init_begin); - /* per-cpu data */ - xen_regions[region_percpu].s = __pa(__per_cpu_start); - xen_regions[region_percpu].e = xen_regions[region_percpu].s + - (((paddr_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss */ xen_regions[region_bss].s = __pa(&__bss_start); xen_regions[region_bss].e = __pa(&_end); @@ -1226,25 +1174,8 @@ int xen_in_range(unsigned long mfn) start = (paddr_t)mfn << PAGE_SHIFT; end = start + PAGE_SIZE; for ( i = 0; i < nr_regions; i++ ) - { - if ( (start >= xen_regions[i].e) || (end <= xen_regions[i].s) ) - continue; - - if ( i == region_percpu ) - { - /* - * Check if the given page falls into an unused (and therefore - * freed) section of the per-cpu data space. Each CPU's data - * area is page-aligned, so the following arithmetic is safe. - */ - unsigned int off = ((start - __pa(__per_cpu_start)) - & (PERCPU_SIZE - 1)); - unsigned int data_sz = __per_cpu_data_end - __per_cpu_start; - return off < data_sz; - } - - return 1; - } + if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) ) + return 1; return 0; } diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 8db5d0a152..53925beae7 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -487,7 +487,7 @@ static int wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) for ( i = 0; i < num_starts; i++ ) { - Dprintk("Sending STARTUP #%d.\n",j); + Dprintk("Sending STARTUP #%d.\n", i+1); apic_read_around(APIC_SPIV); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c index 9cbed0b09f..63aba56820 100644 --- a/xen/arch/x86/tboot.c +++ b/xen/arch/x86/tboot.c @@ -357,7 +357,7 @@ void tboot_shutdown(uint32_t shutdown_type) /* * Xen regions for tboot to MAC */ - g_tboot_shared->num_mac_regions = 4; + g_tboot_shared->num_mac_regions = 3; /* S3 resume code (and other real mode trampoline code) */ g_tboot_shared->mac_regions[0].start = bootsym_phys(trampoline_start); g_tboot_shared->mac_regions[0].size = bootsym_phys(trampoline_end) - @@ -366,13 +366,9 @@ void tboot_shutdown(uint32_t shutdown_type) g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext); g_tboot_shared->mac_regions[1].size = __pa(&__init_begin) - __pa(&_stext); - /* per-cpu data */ - g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__per_cpu_start); - g_tboot_shared->mac_regions[2].size = - (((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss */ - g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start); - g_tboot_shared->mac_regions[3].size = __pa(&_end) - __pa(&__bss_start); + g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__bss_start); + g_tboot_shared->mac_regions[2].size = __pa(&_end) - __pa(&__bss_start); /* * MAC domains and other Xen memory diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index f6ef710154..9d965a786b 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -83,6 +83,11 @@ string_param("nmi", opt_nmi); DEFINE_PER_CPU_READ_MOSTLY(u32, ler_msr); +DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table); +#ifdef CONFIG_COMPAT +DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table); +#endif + /* Master table, used by CPU0. */ idt_entry_t idt_table[IDT_ENTRIES]; @@ -3290,6 +3295,11 @@ void __init trap_init(void) /* CPU0 uses the master IDT. */ idt_tables[0] = idt_table; + this_cpu(gdt_table) = boot_cpu_gdt_table; +#ifdef CONFIG_COMPAT + this_cpu(compat_gdt_table) = boot_cpu_compat_gdt_table; +#endif + percpu_traps_init(); cpu_init(); diff --git a/xen/arch/x86/x86_32/supervisor_mode_kernel.S b/xen/arch/x86/x86_32/supervisor_mode_kernel.S index 2719aaf25a..a9560b7ee6 100644 --- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S +++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S @@ -102,7 +102,7 @@ ENTRY(fixup_ring0_guest_stack) movl $PER_CPU_GDT_ENTRY*8,%ecx lsll %ecx,%ecx - shll $PERCPU_SHIFT,%ecx + movl __per_cpu_offset(,%ecx,4),%ecx addl $per_cpu__init_tss,%ecx # Load Xen stack from TSS. diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S index 31ac018e71..2bc66042de 100644 --- a/xen/arch/x86/xen.lds.S +++ b/xen/arch/x86/xen.lds.S @@ -59,7 +59,7 @@ SECTIONS CONSTRUCTORS } :text - . = ALIGN(128); + . = ALIGN(SMP_CACHE_BYTES); .data.read_mostly : { *(.data.read_mostly) } :text @@ -71,7 +71,7 @@ SECTIONS __lock_profile_end = .; #endif - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -99,33 +99,22 @@ SECTIONS *(.xsm_initcall.init) __xsm_initcall_end = .; } :text - . = ALIGN(PAGE_SIZE); + . = ALIGN(STACK_SIZE); __init_end = .; - __per_cpu_shift = PERCPU_SHIFT; /* kdump assist */ - .data.percpu : { - __per_cpu_start = .; - *(.data.percpu) - . = ALIGN(SMP_CACHE_BYTES); - *(.data.percpu.read_mostly) - . = ALIGN(PAGE_SIZE); - __per_cpu_data_end = .; - } :text - . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT); - . = ALIGN(PAGE_SIZE); - - /* - * Do not insert anything here - the unused portion of .data.percpu - * will be freed/unmapped up to __bss_start (defined below). - */ - .bss : { /* BSS */ - . = ALIGN(STACK_SIZE); __bss_start = .; *(.bss.stack_aligned) . = ALIGN(PAGE_SIZE); *(.bss.page_aligned) *(.bss) + . = ALIGN(SMP_CACHE_BYTES); + __per_cpu_start = .; + *(.bss.percpu) + . = ALIGN(SMP_CACHE_BYTES); + *(.bss.percpu.read_mostly) + . = ALIGN(SMP_CACHE_BYTES); + __per_cpu_data_end = .; } :text _end = . ; diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c index 827b8bd5ca..c8f44bb804 100644 --- a/xen/common/rcupdate.c +++ b/xen/common/rcupdate.c @@ -53,7 +53,7 @@ struct rcu_ctrlblk rcu_ctrlblk = { .cpumask = CPU_MASK_NONE, }; -DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; +DEFINE_PER_CPU(struct rcu_data, rcu_data); static int blimit = 10; static int qhimark = 10000; diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index 6dff698a64..0aeae90cb7 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -231,7 +231,7 @@ static void burn_credits(struct csched_vcpu *svc, s_time_t now) static int opt_tickle_one_idle __read_mostly = 1; boolean_param("tickle_one_idle_cpu", opt_tickle_one_idle); -DEFINE_PER_CPU(unsigned int, last_tickle_cpu) = 0; +DEFINE_PER_CPU(unsigned int, last_tickle_cpu); static inline void __runq_tickle(unsigned int cpu, struct csched_vcpu *new) diff --git a/xen/drivers/cpufreq/utility.c b/xen/drivers/cpufreq/utility.c index 612f7e0919..3085173ae0 100644 --- a/xen/drivers/cpufreq/utility.c +++ b/xen/drivers/cpufreq/utility.c @@ -36,7 +36,7 @@ struct cpufreq_driver *cpufreq_driver; struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS]; struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS]; -DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock) = SPIN_LOCK_UNLOCKED; +DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock); /********************************************************************* * Px STATISTIC INFO * @@ -95,6 +95,8 @@ int cpufreq_statistic_init(unsigned int cpuid) spinlock_t *cpufreq_statistic_lock = &per_cpu(cpufreq_statistic_lock, cpuid); + spin_lock_init(cpufreq_statistic_lock); + if ( !pmpt ) return -EINVAL; diff --git a/xen/include/asm-x86/percpu.h b/xen/include/asm-x86/percpu.h index d8860203ae..fb1d4140ec 100644 --- a/xen/include/asm-x86/percpu.h +++ b/xen/include/asm-x86/percpu.h @@ -1,17 +1,20 @@ #ifndef __X86_PERCPU_H__ #define __X86_PERCPU_H__ -#define PERCPU_SHIFT 13 -#define PERCPU_SIZE (1UL << PERCPU_SHIFT) +#ifndef __ASSEMBLY__ +extern char __per_cpu_start[], __per_cpu_data_end[]; +extern unsigned long __per_cpu_offset[NR_CPUS]; +void percpu_init_areas(void); +#endif /* Separate out the type, so (int[3], foo) works. */ #define __DEFINE_PER_CPU(type, name, suffix) \ - __attribute__((__section__(".data.percpu" #suffix))) \ + __attribute__((__section__(".bss.percpu" #suffix))) \ __typeof__(type) per_cpu_##name /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) \ - (*RELOC_HIDE(&per_cpu__##var, ((unsigned int)(cpu))<